In [1]:
# Import packages
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import numpy as np
import scipy.stats as ss
from scipy.stats import norm
import seaborn as sns
In [2]:
np.random.seed(12345678) # for reproducibility, set random seed
# Read in data
df = pd.read_csv('../output.csv')
nvox = 64*64*48 # assume number of voxels per bin
df['weighted'] = df['synapses']/df['unmasked']*nvox
xvals = df['cx'].unique()
yvals = df['cy'].unique()
zvals = df['cz'].unique()
# Get rid of the blank edges
left = 0;
right = len(xvals);
top = 0;
bottom = len(yvals);
for z in zvals:
this_z = df[df['cz']==z]
# X direction
xhist, bin_edges = np.histogram(this_z['cx'], weights = this_z['unmasked']/(nvox*len(yvals)), bins=len(xvals))
left = max(left, np.argmax(xhist>0.5))
right = min(right, len(xvals)-np.argmax(xhist[::-1]>0.5))
# Y direction
yhist, bin_edges = np.histogram(this_z['cy'], weights = this_z['unmasked']/(nvox*len(xvals)), bins=len(yvals))
top = max(top, np.argmax(yhist>0.5))
bottom = min(bottom, len(yvals)-np.argmax(yhist[::-1]>0.5))
# Copy new dataset without edges
df2 = df.copy()
for z in zvals:
df2.drop(df2.index[(df2['cx']<xvals[left]) | (df2['cx']>=xvals[right])], inplace=True)
df2.drop(df2.index[(df2['cy']<yvals[top]) | (df2['cy']>=yvals[bottom])], inplace=True)
print "There are", len(df2), "bins after removing the edges."
xvals = df2['cx'].unique()
yvals = df2['cy'].unique()
zvals = df2['cz'].unique()
In [3]:
byY = pd.pivot_table(df2, index=['cx','cz'], columns='cy', values='weighted', aggfunc=np.sum)
byY.hist(bins=40, figsize=(16,16));
Divide Y-layers into four groups arbitrarily.
In [4]:
ygroups = np.array([0]*10+[1]*9+[2]*10+[3]*9)
df2['ygroup'] = 0
for y, ygrp in zip(yvals, ygroups):
df2.loc[df2['cy']==y, 'ygroup'] = ygrp
df2.head()
Out[4]:
In [5]:
byYgrp = pd.pivot_table(df2, index=['cx','cz'], columns='ygroup', values='weighted', aggfunc=np.sum)
byYgrp.hist(alpha=0.5, bins=40, figsize=(8,6));
plt.show()
byYgrp.plot.hist(alpha=0.5, bins=40);
plt.show()
In [6]:
plt.figure()
for i in range(4):
plt.subplot(221+i)
ss.probplot(df2[df2['ygroup']==i]['weighted'], plot = plt)
plt.title('Y group '+str(i))
plt.tight_layout()
plt.show()
The distribution of synapse densities in the Y groups appears close to normal.
In [7]:
for i in range(4):
for j in range(i+1,4):
stat, p = ss.ranksums(df2[df2['ygroup']==i]['weighted'], df2[df2['ygroup']==j]['weighted'])
print "Difference between group", i, "and group", j, ": W =", stat, ", p =", p
In [8]:
xgroups = np.array([0]*8+[1]*8+[2]*7+[3]*8+[4]*7+[5]*8+[6]*8+[7]*8+[8]*7+[9]*8+[10]*7)
zgroups = np.array(range(0, 11))
df2['xgroup'] = 0
df2['zgroup'] = 0
for x, xgrp in zip(xvals, xgroups):
df2.loc[df2['cx'] == x, 'xgroup'] = xgrp
for z, zgrp in zip(zvals, zgroups):
df2.loc[df2['cz'] == z, 'zgroup'] = zgrp
df3 = df2.groupby(['xgroup', 'ygroup', 'zgroup']).mean()
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn import cross_validation
from sklearn.cross_validation import LeaveOneOut
from sklearn.neighbors import KNeighborsClassifier
data = df3['weighted']
est = KMeans(n_clusters = 3)
est.fit(data.reshape(len(data),1))
labels = est.labels_
df3['label'] = labels
fea = df3[['cx','cy','cz']]
sns.set_style('white')
cmap = np.array(sns.color_palette("Set1", 3))
colors = [cmap[label] for label in labels]
ax = plt.figure().gca(projection='3d')
patches = ax.scatter(xs=fea['cx'], ys=fea['cy'], zs=fea['cz'],
s=20, c=colors,
edgecolors=None)
plt.xlabel('X')
plt.ylabel('Y')
plt.gca().set_zlabel('Z')
plt.title('K-means clustering of 3D bins')
plt.xticks(df3['cx'].unique()[::3])
plt.yticks(df3['cy'].unique())
ax.zaxis.set_ticks(df3['cz'].unique()[::3])
plt.show()
plt.figure()
sns.boxplot(x="label", y="weighted", data=df3)
plt.xlabel('Group')
plt.ylabel('Weighted number of synapses')
plt.title('K-means clustered groups')
plt.show()
In [9]:
from sklearn import cross_validation
from sklearn.cross_validation import LeaveOneOut
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
names = ["Nearest Neighbors", "Linear SVM", "Random Forest",
"Linear Discriminant Analysis", "Quadratic Discriminant Analysis"]
classifiers = [
KNeighborsClassifier(3),
SVC(kernel="linear", C = 0.001),
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
LinearDiscriminantAnalysis(),
QuadraticDiscriminantAnalysis()]
features = fea
y = labels
accuracy=np.zeros((len(classifiers),2))
for idx, cla in enumerate(classifiers):
X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, y, test_size = 0.4, random_state = 0)
clf = cla.fit(X_train, y_train)
loo = LeaveOneOut(len(features))
scores = cross_validation.cross_val_score(clf, features, y, cv = 2)
accuracy[idx,] = [scores.mean(), scores.std()]
print("Accuracy of %s: %0.2f (+/- %0.2f)" % (names[idx], scores.mean(), scores.std() * 2))
In [10]:
# Import packages
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import cm as CM
from matplotlib import mlab as ML
import pandas as pd
import numpy as np
import itertools
from scipy import stats
from scipy.stats import poisson
from scipy.stats import lognorm
from decimal import *
from mpl_toolkits.mplot3d import Axes3D
from pylab import *
import sklearn.mixture
import scipy.stats as ss
import seaborn as sns
# Read in data
df = pd.read_csv('../output.csv')
nvox = 64*64*48
dfcut = df[(df['cx'] > 400) & (df['cx'] < 3800) & (df['cy'] < 2800)]
dfcut['weighted'] = dfcut['synapses'] / dfcut['unmasked'] * nvox
dfftr = dfcut[dfcut['unmasked'] > nvox * 0.5]
In [11]:
byZgrp = pd.pivot_table(dfftr, index = ['cx','cy'], columns = 'cz', values = 'weighted', aggfunc=np.sum)
byZgrp.hist(alpha = 0.5, bins = 40, figsize = (12, 10))
plt.xlabel('X-coordinate')
plt.ylabel('Z-coordinate')
plt.show()
In [12]:
zgroup = dfftr['cz'].unique()
for i in range(11):
for j in range(i + 1, 11):
stat, p = ss.ranksums(dfftr[dfftr['cz'] == zgroup[i]]['weighted'], dfftr[dfftr['cz'] == zgroup[j]]['weighted'])
print "Difference between group", i, "and group", j, ": W =", stat, ", p =", p
In [13]:
scy = sorted(dfftr['cy'].unique())
sy1 = dfftr[(dfftr['cy'] >= scy[0]) & (dfftr['cy'] <= scy[9])]
sy2 = dfftr[(dfftr['cy'] >= scy[9]) & (dfftr['cy'] <= scy[19])]
sy3 = dfftr[(dfftr['cy'] >= scy[19]) & (dfftr['cy'] <= scy[29])]
sy4 = dfftr[(dfftr['cy'] >= scy[29]) & (dfftr['cy'] <= scy[36])]
gp = [sy1, sy2, sy3, sy4]
for i in range(1, 5):
print "Mean and SD of Group", i, "in Y-layer is", np.mean(gp[i-1]['weighted']), "and", sqrt(np.var(gp[i-1]['weighted']))
In [14]:
#data.groupby(['col1', 'col2'])['col3'].mean()
#print sy1.groupby(['cx','cz']).sum()
#print sy1
xval = sy1['cx'].unique()
zval = sy1['cz'].unique()
ncx = []
ncz = []
nval = []
for r in [sy1, sy2, sy3, sy4]:
out = [0, 0, 0]
for i in xval:
for j in zval:
val = r[(r['cx'] == i) & (r['cz'] == j)]['weighted'].mean()
out = vstack((out, [i, j, val]))
df = pd.DataFrame(out)
df.columns = ['ncx', 'ncz', 'nval']
m, n = df.shape
df = df[1:m]
#print
norm = mpl.colors.Normalize(vmin = min(df['nval']), vmax = max(df['nval']))
cmap = cm.rainbow
plt.figure(figsize = (16, 5))
plt.scatter(df['ncx'], df['ncz'], s = 50, cmap = cmap, c = df['nval'], norm = norm)
plt.xlabel('X-coordinate')
plt.ylabel('Z-coordinate')
plt.title('Density plot of Y-layer')
plt.colorbar()
plt.show()
In [15]:
xval = dfftr['cx'].unique()
yval = dfftr['cy'].unique()
zval = dfftr['cz'].unique()
ncx = []
ncz = []
nval = []
output = [0, 0, 0]
for i in xval:
for j in yval:
val = r[(r['cx'] == i) & (r['cy'] == j)]['weighted'].mean()
output = vstack((output, [i, j, val]))
dfout = pd.DataFrame(output)
dfout.columns = ['ncx', 'ncy', 'nval']
m, n = dfout.shape
dfout = dfout[1:m]
xval = np.hstack((xval[40:87], xval[1:39]))
for i in xval:
xw = dfout[dfout['ncx'] == i]
line, = plt.plot(range(0, len(xw['ncx'])), xw['nval'], '--', linewidth = 2)
if i == 3763:
plt.xlabel('Y position (increaseing order)')
plt.ylabel('Number of synampses')
plt.title('Number of synpases along Y-coordinate')
plt.show()
for i in yval:
xw = dfout[dfout['ncy'] == i]
line, = plt.plot(range(0, len(xw['ncy'])), xw['nval'], '--', linewidth = 2)
if i == 2773:
plt.xlabel('X position (increaseing order)')
plt.ylabel('Number of synampses')
plt.title('Number of synpases along X-coordinate')
plt.show()
In [16]:
np.random.seed(12345678) # for reproducibility, set random seed
# Read in data
df = pd.read_csv('../output.csv')
nvox = 64*64*48 # assume number of voxels per bin
df['weighted'] = df['synapses']/df['unmasked']*nvox
xvals = df['cx'].unique()
yvals = df['cy'].unique()
zvals = df['cz'].unique()
# Get rid of the blank edges
left = 0;
right = len(xvals);
top = 0;
bottom = len(yvals);
for z in zvals:
this_z = df[df['cz']==z]
# X direction
xhist, bin_edges = np.histogram(this_z['cx'], weights = this_z['unmasked']/(nvox*len(yvals)), bins=len(xvals))
left = max(left, np.argmax(xhist>0.5))
right = min(right, len(xvals)-np.argmax(xhist[::-1]>0.5))
# Y direction
yhist, bin_edges = np.histogram(this_z['cy'], weights = this_z['unmasked']/(nvox*len(xvals)), bins=len(yvals))
top = max(top, np.argmax(yhist>0.5))
bottom = min(bottom, len(yvals)-np.argmax(yhist[::-1]>0.5))
# Copy new dataset without edges
df2 = df.copy()
for z in zvals:
df2.drop(df2.index[(df2['cx']<xvals[left]) | (df2['cx']>=xvals[right])], inplace=True)
df2.drop(df2.index[(df2['cy']<yvals[top]) | (df2['cy']>=yvals[bottom])], inplace=True)
print "There are", len(df2), "bins after removing the edges."
xvals = df2['cx'].unique()
yvals = df2['cy'].unique()
zvals = df2['cz'].unique()
In [17]:
from skimage import filters
S = np.array([
pd.pivot_table(df2[df2['cx']==x], index='cy', columns='cz', values='weighted', aggfunc=np.sum).values
for x in xvals
])
sigmas = [0, 0.5, 1, 1.5]
for sigma in sigmas:
plt.figure(figsize=(16,8))
print 'Gaussian filter with sigma =', sigma
for zi, z in enumerate(zvals):
filtXY = pd.DataFrame(filters.gaussian(S[:,:,zi].T, sigma=sigma), index=yvals, columns=xvals)
plt.subplot(3,4,1+zi)
sns.heatmap(filtXY, xticklabels=20, yticklabels=10, vmin=0, vmax=500, cbar=False)
plt.gca().set_title('Z = '+str(z))
plt.gca().set_xlabel('X')
plt.gca().set_ylabel('Y')
plt.tight_layout()
plt.show()
In [33]:
from skimage.feature import greycomatrix, greycoprops
patch_dim = [14, 12]
df2['patch'] = -1
patch = 0;
for i in range(0,len(xvals)-1,patch_dim[0]):
for j in range(1,len(yvals)-1,patch_dim[1]):
left = xvals[i]
right = xvals[i+patch_dim[0]-1]
top = yvals[j]
bottom = yvals[j+patch_dim[1]-1]
print "patch", patch, ", x bounds: (", left, ",", right, ") , y bounds: (", top, ",", bottom, ")"
df2.loc[(df2['cx']>=left) & (df['cx']<right) & (df2['cy']>=top) & (df2['cy']<bottom), 'patch'] = patch
patch = patch+1
print patch, "patches labeled"
npatch = patch
In [19]:
z = zvals[4]
print "Patches for Z =", z
plt.figure(figsize=(16,12))
for patch in range(npatch):
pdf = pd.pivot_table(df2[(df2['cz']==z) & (df2['patch']==patch)],
index='cy', columns='cx', values='weighted', aggfunc=np.sum)
plt.subplot(4,5,1+patch)
sns.heatmap(pdf, xticklabels=5, yticklabels=5, vmin=0, vmax=500, cbar=False)
plt.tight_layout()
plt.show()
In [20]:
z = zvals[4]
levels_list = [5, 10, 20, 40]
for levels in levels_list:
print "GLCM for Z =", z, ", number of levels =", levels
glcm = []
vmin = float('inf')
vmax = -float('inf')
plt.figure(figsize=(16,12))
for patch in range(npatch):
pdf = pd.pivot_table(df2[(df2['cz']==z) & (df2['patch']==patch)],
index='cy', columns='cx', values='weighted', aggfunc=np.sum)
pim = np.rint((levels-1)*pdf.values/500).astype(np.uint8)
pim[pim >= levels] = levels-1
glcm.append(greycomatrix(pim, [3], [0], levels=levels, symmetric=True, normed=True).reshape(levels,levels))
vmin = np.min([vmin, np.min(glcm[-1].ravel())])
vmax = np.max([vmax, np.max(glcm[-1].ravel())])
for patch in range(npatch):
plt.subplot(4,5,1+patch)
sns.heatmap(glcm[patch], xticklabels=5, yticklabels=5, vmin=vmin, vmax=vmax, cbar=False)
plt.tight_layout()
plt.show()
In [21]:
z = zvals[4]
levels = 20
dist_list = [1, 3, 5]
ang_list = [0, np.pi/4, np.pi/2, 3*np.pi/4, np.pi]
for patch in range(0,npatch,6):
print "Patch =", patch
glcm = []
vmin = float('inf')
vmax = -float('inf')
pdf = pd.pivot_table(df2[(df2['cz']==z) & (df2['patch']==patch)],
index='cy', columns='cx', values='weighted', aggfunc=np.sum)
pim = np.rint((levels-1)*pdf.values/500).astype(np.uint8)
pim[pim >= levels] = levels-1
glcm = greycomatrix(pim, dist_list, ang_list, levels=levels, symmetric=True, normed=True)
# vmin = np.min([vmin, np.min(glcm[-1].ravel())])
# vmax = np.max([vmax, np.max(glcm[-1].ravel())])
plt.figure(figsize=(16,8))
fidx = 0
for i, dist in enumerate(dist_list):
for j, ang in enumerate(ang_list):
plt.subplot(len(dist_list),len(ang_list),1+fidx)
sns.heatmap(glcm[:,:,i,j].reshape(levels,levels),
xticklabels=5, yticklabels=5, vmin=0, vmax=np.max(glcm.ravel()), cbar=False)
plt.title("Distance = "+str(dist)+", Angle = "+str(ang))
fidx = fidx+1
plt.tight_layout()
plt.show()
In [30]:
z = zvals[4]
levels = 20
dist = 3
ang = np.pi/2
props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
nprops = len(props)
df_list = []
for j, z in enumerate(zvals):
patchprops = np.zeros((npatch, nprops))
xdata = []
ydata = []
for patch in range(npatch):
pdf = pd.pivot_table(df2[(df2['cz']==z) & (df2['patch']==patch)],
index='cy', columns='cx', values='weighted', aggfunc=np.sum)
pim = np.rint((levels-1)*pdf.values/500).astype(np.uint8)
pim[pim >= levels] = levels-1
glcm = greycomatrix(pim, [dist], [ang], levels=levels, symmetric=True, normed=True)
xdata.append(np.mean(pdf.columns.values))
ydata.append(np.mean(pdf.index.values))
for i, prop in enumerate(props):
# print greycoprops(glcm, prop)[0, 0]
patchprops[patch,i] = greycoprops(glcm, prop)[0, 0]
tempdf = pd.DataFrame(patchprops, index=range(npatch), columns=props)
tempdf['patch'] = tempdf.index
tempdf['cx'] = xdata
tempdf['cy'] = ydata
tempdf['cz'] = z
df_list.append(tempdf)
ppdf = pd.concat(df_list)
ppdf.head()
Out[30]:
In [34]:
g = sns.PairGrid(ppdf, hue='cz', vars=props)
g = g.map_diag(plt.hist)
g = g.map_offdiag(plt.scatter)
In [24]:
from skimage.filters.rank import entropy
from skimage.morphology import disk
diams = [5, 10, 15, 20]
for d in diams:
plt.figure(figsize=(16,8))
print 'Entropy with disk =', d
entXY = []
vmin = float('inf')
vmax = -float('inf')
for zi, z in enumerate(zvals):
imXY = np.rint(255*(S[:,:,zi].T/500)).astype(np.uint8)
imXY[imXY > 255] = 255
entXY.append(pd.DataFrame(entropy(imXY, disk(d)), index=yvals, columns=xvals))
vmin = np.min([vmin, np.min(entXY[-1].values.ravel())])
vmax = np.max([vmax, np.max(entXY[-1].values.ravel())])
for zi, z in enumerate(zvals):
plt.subplot(3,4,1+zi)
sns.heatmap(entXY[zi], xticklabels=20, yticklabels=10, vmin=vmin, vmax=vmax, cbar=False)
plt.gca().set_title('Z = '+str(z))
plt.gca().set_xlabel('X')
plt.gca().set_ylabel('Y')
plt.tight_layout()
plt.show()